########## GLOBALS ##########
rm(list = ls())
# Load packages
library(igraph)
library(dplyr)
library(estimatr)
library(ggplot2)
library(grid)
#library(ggfortify)
# Load data files
edgelist <- read.csv("data/edgelist.csv")
########## GLOBALS ##########
rm(list = ls())
# Load packages
library(igraph)
library(dplyr)
library(estimatr)
library(ggplot2)
library(grid)
#library(ggfortify)
# Load data files
edgelist <- read.csv("data/edgelist.csv")
county_incomes <- read.csv("data/saipe_incomes.csv")
# Restrict to desired cols
edgelist <- edgelist[, c("start_id", "end_id", "state_county")]
########## GRAPH SETUP ##########
# Convert to graph
transit_g <- graph_from_data_frame(edgelist, directed = TRUE, vertices = NULL)
# Prepare state_county for unique IDs
vertex_data <- data.frame(id = c(edgelist$start_id, edgelist$end_id),
state_county = c(edgelist$state_county[match(edgelist$start_id, edgelist$start_id)],
edgelist$state_county[match(edgelist$end_id, edgelist$end_id)]))
# De-duplicate state_county per unique ID
vertex_data <- vertex_data[!duplicated(vertex_data$id), ]
# Assign state_county as a vertex attribute
V(transit_g)$state_county <- vertex_data$state_county[match(V(transit_g)$name, vertex_data$id)]
# Remove state_county attribute from edges and clean workspace
transit_g <- delete_edge_attr(transit_g, "state_county")
rm(vertex_data)
# Add centrality measures
V(transit_g)$degree <- degree(transit_g)
V(transit_g)$closeness <- closeness(transit_g)
########## REFORMAT DATA ##########
# Create a data frame from vertex attributes
vertex_df <- data.frame(
state_county = V(transit_g)$state_county, # State and county attribute
degree = V(transit_g)$degree, # Degree centrality
closeness = V(transit_g)$closeness # Closeness centrality
)
# Aggregate to the county-level
county_agg <- vertex_df %>%
group_by(state_county) %>%
summarise(
num_nodes = n(),
avg_degree = mean(degree, na.rm = TRUE),
avg_closeness = mean(closeness, na.rm = TRUE),
wavg_degree = sum(degree, na.rm = TRUE), # cancels out `num_nodes`
wavg_closeness = sum(closeness, na.rm = TRUE) # cancels out `num_nodes`
)
county_agg <- as.data.frame(county_agg)
# Add state column
county_agg$state <- substr(county_agg$state_county, 1, 2)
county_agg$state <- gsub("_", "", county_agg$state)
# Add median income
county_agg <- left_join(county_agg,
county_incomes,
by = "state_county")
# Subset to relevant cols
county_agg <- county_agg[, c("state",
"Name",
"state_county",
"Median.Household.Income",
"num_nodes",
"avg_degree",
"avg_closeness",
"wavg_degree",
"wavg_closeness")]
# Rename cols
colnames(county_agg) <- c("state_fips",
"county_name",
"state_county",
"median_income",
"num_nodes",
"avg_degree",
"avg_closeness",
"wavg_degree",
"wavg_closeness")
# Coerce median income to numeric
county_agg$median_income <- gsub(",", "", county_agg$median_income)
county_agg$median_income <- as.numeric(county_agg$median_income)
# Clean workspace
rm(county_incomes)
########## TRANSFORMATIONS ##########
# Use the weighted DVs (sum instead of mean)
# Log all variables
county_agg$log.weighted_avg_degree <- log(county_agg$wavg_degree)
county_agg$log.weighted_avg_closeness <- log(county_agg$wavg_closeness + 1) # add 1 to avoid -Inf
county_agg$log.median_income <- log(county_agg$median_income)
########## TRANSFORMATION VISUALS ##########
# Declare each individual plot
# Use plot and y-axis titles to create labels
# Always set plot and y-axis title to "" if unused
# Always set x-axis title to NULL
# Theme adjustments for consistency
before_degree <- ggplot(county_agg, aes(x = avg_degree)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "Avg. Degree",
x = NULL,
y = "Before"
)
before_closeness <- ggplot(county_agg, aes(x = avg_closeness)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "Avg. Closeness",
x = NULL,
y = ""
)
before_income <- ggplot(county_agg, aes(x = median_income)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "Median Income",
x = NULL,
y = ""
)
after_degree <- ggplot(county_agg, aes(x = log.weighted_avg_degree)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "",
x = NULL,
y = "After"
)
after_closeness <- ggplot(county_agg, aes(x = log.weighted_avg_closeness)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "",
x = NULL,
y = ""
)
after_income <- ggplot(county_agg, aes(x = log.median_income)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "",
x = NULL,
y = ""
)
# Arrange the plot
pdf("../Output/Data_Transformation_Figures/transformations.pdf", 6, 4)
grid.arrange(before_degree,
before_closeness,
before_income,
after_degree,
after_closeness,
after_income,
nrow = 2,
ncol = 3)
?grid
??grid
??grid.arrange
library(ggfortify)
########## GLOBALS ##########
rm(list = ls())
# Load packages
library(igraph)
library(dplyr)
library(estimatr)
library(ggplot2)
library(grid)
#library(ggfortify)
# Load data files
edgelist <- read.csv("data/edgelist.csv")
county_incomes <- read.csv("data/saipe_incomes.csv")
# Restrict to desired cols
edgelist <- edgelist[, c("start_id", "end_id", "state_county")]
########## GRAPH SETUP ##########
# Convert to graph
transit_g <- graph_from_data_frame(edgelist, directed = TRUE, vertices = NULL)
# Prepare state_county for unique IDs
vertex_data <- data.frame(id = c(edgelist$start_id, edgelist$end_id),
state_county = c(edgelist$state_county[match(edgelist$start_id, edgelist$start_id)],
edgelist$state_county[match(edgelist$end_id, edgelist$end_id)]))
# De-duplicate state_county per unique ID
vertex_data <- vertex_data[!duplicated(vertex_data$id), ]
# Assign state_county as a vertex attribute
V(transit_g)$state_county <- vertex_data$state_county[match(V(transit_g)$name, vertex_data$id)]
# Remove state_county attribute from edges and clean workspace
transit_g <- delete_edge_attr(transit_g, "state_county")
rm(vertex_data)
# Add centrality measures
V(transit_g)$degree <- degree(transit_g)
V(transit_g)$closeness <- closeness(transit_g)
########## REFORMAT DATA ##########
# Create a data frame from vertex attributes
vertex_df <- data.frame(
state_county = V(transit_g)$state_county, # State and county attribute
degree = V(transit_g)$degree, # Degree centrality
closeness = V(transit_g)$closeness # Closeness centrality
)
# Aggregate to the county-level
county_agg <- vertex_df %>%
group_by(state_county) %>%
summarise(
num_nodes = n(),
avg_degree = mean(degree, na.rm = TRUE),
avg_closeness = mean(closeness, na.rm = TRUE),
wavg_degree = sum(degree, na.rm = TRUE), # cancels out `num_nodes`
wavg_closeness = sum(closeness, na.rm = TRUE) # cancels out `num_nodes`
)
county_agg <- as.data.frame(county_agg)
# Add state column
county_agg$state <- substr(county_agg$state_county, 1, 2)
county_agg$state <- gsub("_", "", county_agg$state)
# Add median income
county_agg <- left_join(county_agg,
county_incomes,
by = "state_county")
# Subset to relevant cols
county_agg <- county_agg[, c("state",
"Name",
"state_county",
"Median.Household.Income",
"num_nodes",
"avg_degree",
"avg_closeness",
"wavg_degree",
"wavg_closeness")]
# Rename cols
colnames(county_agg) <- c("state_fips",
"county_name",
"state_county",
"median_income",
"num_nodes",
"avg_degree",
"avg_closeness",
"wavg_degree",
"wavg_closeness")
# Coerce median income to numeric
county_agg$median_income <- gsub(",", "", county_agg$median_income)
county_agg$median_income <- as.numeric(county_agg$median_income)
# Clean workspace
rm(county_incomes)
########## TRANSFORMATIONS ##########
# Use the weighted DVs (sum instead of mean)
# Log all variables
county_agg$log.weighted_avg_degree <- log(county_agg$wavg_degree)
county_agg$log.weighted_avg_closeness <- log(county_agg$wavg_closeness + 1) # add 1 to avoid -Inf
county_agg$log.median_income <- log(county_agg$median_income)
########## TRANSFORMATION VISUALS ##########
# Declare each individual plot
# Use plot and y-axis titles to create labels
# Always set plot and y-axis title to "" if unused
# Always set x-axis title to NULL
# Theme adjustments for consistency
before_degree <- ggplot(county_agg, aes(x = avg_degree)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "Avg. Degree",
x = NULL,
y = "Before"
)
before_closeness <- ggplot(county_agg, aes(x = avg_closeness)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "Avg. Closeness",
x = NULL,
y = ""
)
before_income <- ggplot(county_agg, aes(x = median_income)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "Median Income",
x = NULL,
y = ""
)
after_degree <- ggplot(county_agg, aes(x = log.weighted_avg_degree)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "",
x = NULL,
y = "After"
)
after_closeness <- ggplot(county_agg, aes(x = log.weighted_avg_closeness)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "",
x = NULL,
y = ""
)
after_income <- ggplot(county_agg, aes(x = log.median_income)) +
geom_histogram(fill = "blue", color = "black", alpha = 0.15) +  # Bin width is auto-determined
theme_bw(base_size = 15) +
theme(
plot.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.title = element_text(face = "bold", size = rel(0.6), hjust = 0.5),
axis.text = element_text(size = rel(0.5)),
panel.grid = element_blank()
) +
labs(
title = "",
x = NULL,
y = ""
)
# Arrange the plot
pdf("../Output/Data_Transformation_Figures/transformations.pdf", 6, 4)
grid.arrange(before_degree,
before_closeness,
before_income,
after_degree,
after_closeness,
after_income,
nrow = 2,
ncol = 3)
library(grid)
